***************************************************************************************************************
/*
THIS DO-FILE: 

1. Appends PSID imputation sample to HRS sample
2. Imputes child assets for HRS sample
3. Saves kid assets for HRS sample as "imputed_kid_assets.dta"

*/
***************************************************************************************************************

use "my_sample_long.dta", clear
*keep if cohort==1 & inw==1

*prepare kid characteristics - give an average value if missing, just to be used for these purposes
replace kmale=0 if kmale==. //set missings to female
replace kmarried=0 if kmarried==. //set missings to not married
replace keducbin=2 if keducbin==. //set missings to HS
gen 	kchildren=kkids if kkids<.
sort hhidpn wave
by hhidpn: replace kchildren=kchildren[_n+1] if kchildren==.
by hhidpn: replace kchildren=kchildren[_n+1] if kchildren==.
replace kchildren=5 if kchildren>=5 & kchildren!=.
replace kchildren=0 if kchildren==. //set missings to 0 children

sort hhidpn wave
by hhidpn: replace kownhome=kownhome[_n+1] if kownhome==.
by hhidpn: replace kownhome=kownhome[_n+1] if kownhome==.
replace kownhome=0 if kownhome>=. //set missings to not owning a home

gen 	kinctemp=kinc if kinc<.
sort hhidpn wave
by hhidpn: replace kinctemp=kinctemp[_n+1] if kinctemp==.
by hhidpn: replace kinctemp=kinctemp[_n+1] if kinctemp==.
bys hhidpn: egen kinctemp2=max(kinctemp)
replace kinctemp=kinctemp2 if kinctemp==.
gen 	kincbin1=(kinctemp==1) if kinctemp!=.
gen		kincbin2=(kinctemp==2) if kinctemp!=.
gen		kincbin3=(kinctemp==3) if kinctemp!=.
gen		kincbin4=(inlist(kinctemp,3,4)) if kinctemp!=.
gen 	kincbin5=(kinctemp==5) if kinctemp!=.
replace kincbin1=0 if kinctemp==. //set missings to 10-35K
replace kincbin2=1 if kinctemp==. //set missings to 10-35K
replace kincbin3=0 if kinctemp==. //set missings to 10-35K
replace kincbin4=0 if kinctemp==. //set missings to 10-35K
replace kincbin5=0 if kinctemp==. //set missings to 10-35K

gen year=1998 if wave==4
replace year=2000 if wave==5
replace year=2002 if wave==6
replace year=2004 if wave==7
replace year=2006 if wave==8
replace year=2008 if wave==9
replace year=2010 if wave==10
replace year=2012 if wave==11
replace year=2014 if wave==12

append using "kid_assets_PSIDimputation.dta"

gen kage2=kage*kage
gen kage3=kage*kage*kage
gen kage4=kage*kage*kage*kage

*these are just renames for consistency across datasets
replace age=page if age==.
replace assets=passets if assets==.
replace income=pinctot if income==.
drop page passets pinctot

gen age2=age*age
gen assets2=assets*assets

xtile incpctile=income, nq(5)
xtile asspctile=assets, nq(5)

*winsorize kassets and then impute with logs
winsor2 kassets, replace cuts(10 90)

*put things in more discrete bins
gen kanykids=(kchildren>=1) if kchildren!=.
gen 	kagebin=1 if kage<40
replace kagebin=2 if kagebin==. & kage<45
replace kagebin=3 if kagebin==. & kage<50
replace kagebin=4 if kagebin==. & kage<55
replace kagebin=5 if kagebin==. & kage<60
replace kagebin=6 if kagebin==. & kage<65
replace kagebin=7 if kagebin==. & kage!=.

gen 	agebin=1 if age<65
replace agebin=2 if agebin==. & age<70
replace agebin=3 if agebin==. & age<75
replace agebin=4 if agebin==. & age<80
replace agebin=5 if agebin==. & age<85
replace agebin=6 if agebin==. & age<90
replace agebin=7 if agebin==. & age!=.

*IMPUTE
mi set wide
mi register imputed kassets
mi impute pmm kassets kincbin* kmale kmarried i.keducbin kanykids i.kagebin i.agebin i.incpctile i.asspctile i.year, add(1) knn(100) rseed(12486) force
mi extract 1, clear

drop if PSIDsample==1

keep hhidpn wave kassets
save "imputed_kid_assets.dta", replace
******************************************************************************************
